clear all
set more off
set matsize 10000
set maxvar 10000
adopath + ../code/gslab_tools/

preliminaries, doutf(../derived/Prediction)
set seed 0227

use "../derived/Combined/combined_arm_level.dta" if year!=., clear
replace dose_min = 0 if drug=="placebo"
foreach var of varlist mean_age share_female dose_min no_randomised dropout_share ////
	lengthoftrial stdz_baseline {
	gen `var'_missing = `var'==.
	summ `var'
	replace `var' = `r(mean)' if `var'==.
}
save ../derived/Prediction/combined_chars.dta, replace

forvalues bstrap=1/100 {
	use  ../derived/Prediction/combined_chars.dta, clear
	
	bsample, cluster(studyname)
	*Create LASSO predictions
	lasso linear stdz_outcome_all c.mean_age#i.drug_group c.share_female#i.drug_group ///
		c.dropout_share#i.drug_group c.stdz_baseline#i.drug_group *_missing ///
		c.dose_min#i.drug_group c.no_randomised#i.drug_group ////
		c.lengthoftrial#i.drug_group i.setting_group i.status_group i.center_group ///
		i.placebo_group i.scale_group i.year_group 
	predict predict_all_`bstrap', xb

	*Create one-off predictions
	foreach var in no_randomised lengthoftrial dose_min stdz_baseline dropout_share ///
		mean_age share_female {
		reg stdz_outcome_all c.`var'#i.drug_group i.scale_group i.year_group 
		predict predict_`var'_`bstrap', xb
		global predict_`var'_`bstrap'_r2 = `e(r2_a)'
	}

	keep studyname drug unique_id predict*
	duplicates drop
	save ../derived/Prediction/predictions_`bstrap'.dta, replace
}

use ../derived/Prediction/predictions_1.dta, clear
forvalues bstrap=2/100 {	
	merge 1:1 studyname drug unique_id using ../derived/Prediction/predictions_`bstrap'.dta, nogen
}
save ../derived/Prediction/prediction_bstrap.dta, replace

*Clean up files
forvalues bstrap=1/100{	
	cap erase ../derived/Prediction/predictions_`bstrap'.dta
}
cap erase ../derived/combined_chars.dta